


load("Panel_Full.RData")

#Remove very old cars with no type approval code
Panel<-subset(Panel, typegoedkeuringsnummer!="")

#Dummy for vehicle ever recalled
Panel$recalled<-0
Panel$recalled[is.na(Panel$referentiecoderdw)==FALSE]<-1


#Factor variable for date
Panel$date<-as.factor(Panel$date)
#Factor variable for vehicle age
Panel$age_car_d<-as.factor(Panel$age_car)
#Factor variable for vehicle plate
Panel$kenteken<-as.factor(Panel$kenteken)

#Create dummies for pre-treatment three-months periods
#Panel$rec_precat1<-0
#Panel$rec_precat1[Panel$rec_new_dist_both<=0 & Panel$rec_new_dist_both>=-2]<-1
Panel$rec_precat2<-0
Panel$rec_precat2[Panel$rec_new_dist_both<=-3 & Panel$rec_new_dist_both>=-5]<-1
Panel$rec_precat3<-0
Panel$rec_precat3[Panel$rec_new_dist_both<=-6 & Panel$rec_new_dist_both>=-8]<-1
Panel$rec_precat4<-0
Panel$rec_precat4[Panel$rec_new_dist_both<=-9]<-1

#Create dummies for post-treatment three-months periods
Panel$rec_postcat1<-0
Panel$rec_postcat1[Panel$rec_new_dist_both>=1 & Panel$rec_new_dist_both<=3]<-1
Panel$rec_postcat2<-0
Panel$rec_postcat2[Panel$rec_new_dist_both>=4 & Panel$rec_new_dist_both<=6]<-1
Panel$rec_postcat3<-0
Panel$rec_postcat3[Panel$rec_new_dist_both>=7 & Panel$rec_new_dist_both<=9]<-1
Panel$rec_postcat4<-0
Panel$rec_postcat4[Panel$rec_new_dist_both>=10]<-1

#Create type factor variable
Panel$id_type<-as.factor(Panel$typegoedkeuringsnummer)
#Create variant factor variable
Panel$id_var<-as.factor(paste(Panel$typegoedkeuringsnummer, Panel$variant))
#Create version factor variable
Panel$id_ver<-as.factor(paste(Panel$typegoedkeuringsnummer, Panel$variant, Panel$uitvoering))
#Create version-time factor variable
Panel$id_ver_date<-as.factor(paste(Panel$id_ver, Panel$date))
#Create version-recalled factor variable
Panel$id_ver_rec<-as.factor(paste(Panel$id_ver, Panel$recalled))
#Create type-time factor variable
Panel$id_type_date<-as.factor(paste(Panel$id_type, Panel$date))

#Create distance in years from the last sales, as factor variable
Panel$difflastsaleyr_d<-as.factor(floor(Panel$difflastsale/12))

#Create real list price
#Load consumer price index data
load("HCPI.RData")
HCPI$date<-as.Date(HCPI$date, format="%m/%d/%Y")
HCPI$month<-month(HCPI$date)
HCPI$year<-year(HCPI$date)

load("Link_Char.RData")
Link_Char<-subset(Link_Char, select=c(kenteken, catalogusprijs, merk, handelsbenaming))

Panel<-merge(Panel, Link_Char, by="kenteken", all.x=TRUE, all.y=FALSE)




#Get real price at the first registration 
Panel$month<-month(Panel$datumeerstetoelating)
Panel$year<-year(Panel$datumeerstetoelating)
HCPI$date<-NULL
Panel<-merge(Panel, HCPI, by=c("month", "year"), all.x=TRUE, all.y=TRUE)
Panel$realprice<-Panel$catalogusprijs/(Panel$hcpi/100)
Panel$realprice<-round(Panel$realprice)
Panel$year<-NULL
Panel$month<-NULL
#check
Panel$dupl<-duplicated(cbind(Panel$kenteken, Panel$date))
table(Panel$dupl)
Panel<-subset(Panel,dupl==FALSE)
Panel$dupl<-NULL

#126698700

load("APKtot_recent.RData") 
Panel$date1<-as.character(Panel$date)
Panel$kenteken1<-as.character(Panel$kenteken)
APKtot_recent$date<-as.character(APKtot_recent$date)
APKtot_recent$kenteken<-as.character(APKtot_recent$kenteken)
Panel<-merge(Panel, APKtot_recent, by.x=c("kenteken1", "date1"), by.y=c("kenteken", "date"), all.x=TRUE, all.y=FALSE)
Panel$APKtot[is.na(Panel$APKtot)]<-0
Panel$date1<-NULL
Panel$kenteken1<-NULL

#Rating brand
Panel$rating_merk<-NA
i<- str_detect(Panel$merk, "SUZUKI")
Panel$rating_merk[i]<-8.7
i<- str_detect(Panel$merk, "HONDA")
Panel$rating_merk[i]<-8.5
i<- str_detect(Panel$merk, "TOYOTA")
Panel$rating_merk[i]<-8.3
i<- str_detect(Panel$merk, "HYUNDAI")
Panel$rating_merk[i]<-8.1
i<- str_detect(Panel$merk, "MITSUBISHI")
Panel$rating_merk[i]<-8.1
i<- str_detect(Panel$merk, "KIA")
Panel$rating_merk[i]<-8.0
i<- str_detect(Panel$merk, "MAZDA")
Panel$rating_merk[i]<-7.7
i<- str_detect(Panel$merk, "DAIHATSU")
Panel$rating_merk[i]<-7.6
i<- str_detect(Panel$merk, "NISSAN")
Panel$rating_merk[i]<-7.6
i<- str_detect(Panel$merk, "DACIA")
Panel$rating_merk[i]<-7.4
i<- str_detect(Panel$merk, "MERCEDES")
Panel$rating_merk[i]<-7.4
i<- str_detect(Panel$merk, "FORD")
Panel$rating_merk[i]<-7.2
i<- str_detect(Panel$merk, "AUDI")
Panel$rating_merk[i]<-7.1
i<- str_detect(Panel$merk, "SUBARU")
Panel$rating_merk[i]<-7.1
i<- str_detect(Panel$merk, "FIAT")
Panel$rating_merk[i]<-7.0
i<- str_detect(Panel$merk, "BMW")
Panel$rating_merk[i]<-6.9
i<- str_detect(Panel$merk, "RENAULT")
Panel$rating_merk[i]<-6.7
i<- str_detect(Panel$merk, "OPEL")
Panel$rating_merk[i]<-6.6
i<- str_detect(Panel$merk, "VOLVO")
Panel$rating_merk[i]<-6.4
i<- str_detect(Panel$merk, "SKODA")
Panel$rating_merk[i]<-6.1
i<- str_detect(Panel$merk, "CITRO")
Panel$rating_merk[i]<-5.9
i<- str_detect(Panel$merk, "VOLKSWAGEN")
Panel$rating_merk[i]<-5.9
i<- str_detect(Panel$merk, "SEAT")
Panel$rating_merk[i]<-5.8
i<- str_detect(Panel$merk, "PEUGEOT")
Panel$rating_merk[i]<-5.5

Panel_all<-Panel

save(Panel_all, file="Panel_all_complete.RData")

